package processor;

import org.apache.http.HttpHost;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * Created by zhangcheng on 17/4/18.
 */
public class SinaPageProcessor implements PageProcessor {

    private Site site = Site.me()
            .setDomain("http://weibo.com/")
            .setUserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36")
            .setRetryTimes(3)
            .setHttpProxy(new HttpHost("10.101.1.6",80))
            .setSleepTime(1000);
//            .addHeader("Content-Type","application/x-www-form-urlencoded")
//            .addHeader("Cookie","SINAGLOBAL=1592141140718.013.1447085595048; un=18007303287; TC-V5-G0=7975b0b5ccf92b43930889e90d938495; YF-Page-G0=3d55e26bde550ac7b0d32a2ad7d6fa53; SCF=AkWMTXlt24R4Huksml1QkFfmkTnE4Ag0ZxMY1z4BcsvFOIhbrhuPLiV1b67XXL05wjoQz0nWtIrfqdhLuU0abps.; SUHB=0OMUZdMeeB8-8x; YF-V5-G0=32427df11f152291036145f8d346cc49; _s_tentry=login.sina.com.cn; Apache=9953985294598.043.1493175483020; ULV=1493175483026:3:3:2:9953985294598.043.1493175483020:1492995236723; YF-Ugrow-G0=169004153682ef91866609488943c77f; SUB=_2AkMuXIIGdcPxrAZUnvsXyGLkao5H-jydievwAn7uJhMyAxgv7m4fqSUANLRLUT1Z_hG24zn9-GQMlpgp_g..; SUBP=0033WrSXqPxfM72wWs9jqgMF55529P9D9WWsYlU1OflQ6r8kw5BMGwxR5JpVF02Reh5Ne050eonN; login_sid_t=ce6247356edbede731f883024019b5f8; UOR=www.csdn.net,widget.weibo.com,login.sina.com.cn; WBStorage=02e13baf68409715|undefined")
//            .addHeader("Host","search.sina.com.cn")
//            .addHeader("Proxy-Authorization","NzYxNzg3Onp6bDY2MTAyOF4=")
//            .addHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36")
//            .addHeader("Proxy-Connection","keep-alive")
//            .addHeader("Referer","http://weibo.com/ttarticle/p/show?id=2309404098740582488478")
//            .addHeader("X-Requested-With","XMLHttpRequest");
    public void process(Page page) {
        System.out.println("pageinfo:");
        System.out.println(page.getHtml());
    }

    public SinaPageProcessor() throws Exception {
        //this.site = WeiBoMSelenium.click(this.site, "18007303287", "qwertsekfo1");
    }

    public Site getSite() {
        return site;
    }

    public static void main(String[] args) throws Exception {
        Spider.create(new SinaPageProcessor()).addUrl("http://search.sina.com.cn/?q=南方航空&c=news&from=index").run();
    }
}
